This is the sample R Markdown file following the workshop to create the template for Data Science workflow that we learnt inside class:
rm(list=ls())
library(caret)
## Loading required package: lattice
## Loading required package: ggplot2
library(GGally)
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
library(ggplot2)
library(RColorBrewer)
setwd("/home/tuev/Kaggle/house-prices")
df_train <- read.csv("https://raw.githubusercontent.com/vuminhtue/SMU_Data_Science_workflow_R/master/data/Kaggle_house_prices/train.csv",head=T)
df_test <- read.csv("https://raw.githubusercontent.com/vuminhtue/SMU_Data_Science_workflow_R/master/data/Kaggle_house_prices/test.csv",head=T)
df_small <- df_train[c("OverallQual","OverallCond","YearBuilt","X1stFlrSF",
"FullBath","GarageCars","SaleCondition","SalePrice")]
ggpairs(df_small,aes(colour=SaleCondition))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.